***UNMET NEED: COUNTERFACTUAL ANALYSIS
***REPLICATION DO FILE
***MAHESH KARRA
***VERSION: 5-10-21

***SET WORKING DIRECTORY
global maindir "SET WORKING DIRECTORY"

cd "$maindir"

use "$maindir\DATA FILE.dta", clear

***SET GRAPH FONT
graph set window fontface "Times New Roman"
graph set print fontface "Times New Roman"

*****YEAR AND COUNTRY DUMMIES
tab year, gen(year)

gen country = substr(v000,1,2)
tab country, gen(country)

****SURVEY WEIGHTS
*Generate weight
gen weight = v005/1000000

****GENERATE UNIQUE CLUSTER VARIABLE
tostring v001, replace
gen uniq_cluster = v000 + " " + v001
destring v001, replace

****GENERATE UNIQUE HHID VARIABLE
tostring v002, replace
gen hhid = uniq_cluster + " " + v002
destring v002, replace

*Make unique strata values by region/urban-rural (label option automatically labels the results)
egen strata = group(v024 v025), label
*Check results
tab strata

***GENERATE MOST RECENT SURVEY YEAR
bysort country: egen max_year = max(year)

*Set the weight (using pweights for robust standard errors), cluster (psu), and strata:
svyset [pweight=weight], psu(uniq_cluster) strata(strata) || hhid

*****ENCODE SURVEY VARIABLE FROM 
encode v000, gen(survey)

***GENERATE METHOD MIX VARIABLE
clonevar method_mix = v312
replace method_mix = . if v312==.

***METHOD DUMMIES
tab method_mix, gen(method_mix_)

***GENERATE ETHNICITY
gen ethnicity = v131
replace ethnicity = . if v131==.

***GENERATE REGION
gen region = v024
replace region = . if v024==.

***GENERATE CONTRACEPTIVE USE DUMMY
gen cont_use = v313 > 0
replace cont_use = . if v313==.

bysort survey: egen cont_use_svy = mean(cont_use)

label define contuse 0 "Not using" 1 "Using FP"
label values cont_use contuse

***GENERATE CONTRACEPTIVE USE CATEGORIES
recode v313 (0 = 0 "No Method")(1 2 = 1 "Traditional Methods")(3 = 2 "Modern Methods"), gen(cont_use_cat)
replace cont_use_cat = . if v313==.

***GENERATE IDEAL CONTRACEPTIVE USE
*WEALTHIEST QUINTILE: v190==5
*CURRENTLY MARRIED OR IN UNION: v501==1 | v501==2
*HIGH EDUCATED: v106==3
*ELECTRICITY: v119==1
*OWN CAR: v125==1
*AUTONOMY OVER HEALTH CARE: v743a==1
*KNOWS METHODS: v301 > 1
*NO PROBLEM GETTING PERMISSION TO GO, v467b==0
*NO PROBLEM DISTANCE TO FACILITY, v467d==0
gen high_quint = v190==5
replace high_quint = . if v190==.
gen curr_married = v501==1 | v501==2
replace curr_married = . if v501==.
gen high_educ = v106==3
replace high_educ = . if v106==.
gen electricity = v119==1
replace electricity = . if v119==.
gen car = v125==1
replace car = . if v125==.
gen aut_hc = v743a==1
replace aut_hc = . if v743a==.
gen knows_method = v301 > 0
replace knows_method = . if v301==.
gen no_problem_permission = v467b==0 | v467b==2
replace no_problem_permission = . if v467b==.
gen no_problem_distance = v467d==0 | v467d==2
replace no_problem_distance = . if v467d==.

***GENERATE SAMPLE DENOMINATOR
*Currently married
*Sexually active in last 4 weeks (30 days), v536==1 | v528 < 30
gen sex_active = v536==1 | v528 < 30
replace sex_active = . if v536==. | v528==.
*Not currently amenorrheic, v405==0
gen not_curr_amenorr = v405==0
replace not_curr_amenorr = . if v405==.

gen denom = curr_married==1 | sex_active==1 | not_curr_amen==1
replace denom = . if curr_married==. & sex_active==. & not_curr_amen==.


bysort survey: egen ideal_cont_use = mean(cont_use) if high_quint==1 & high_educ==1 & knows_method==1 & no_problem_distance==1 & (curr_married==1 | sex_active==1) & cont_use!=.
bysort survey: egen ideal_cont_use_r = mean(ideal_cont_use)

***GENERATE INDICATOR OF IDEAL ENVIRONMENT - DIFFERENT FACTORS
gen ideal = high_quint==1 & high_educ==1 & knows_method==1 & no_problem_distance==1 & (curr_married==1 | sex_active==1) & cont_use!=.
gen ideal_no_educ = high_quint==1 & knows_method==1 & no_problem_distance==1 & (curr_married==1 | sex_active==1) & cont_use!=.
gen ideal_no_quin = high_educ==1 & knows_method==1 & no_problem_distance==1 & (curr_married==1 | sex_active==1) & cont_use!=.
gen ideal_no_knows = high_quint==1 & high_educ==1 & no_problem_distance==1 & (curr_married==1 | sex_active==1) & cont_use!=.
gen ideal_no_dist = high_quint==1 & high_educ==1 & knows_method==1 & (curr_married==1 | sex_active==1) & cont_use!=.
gen ideal_no_marr = high_quint==1 & high_educ==1 & knows_method==1 & no_problem_distance==1 & cont_use!=.

***GENERATE UNMET NEED WITH COUNTERFACTUAL CONTRACEPTIVE USE DEFINITION
gen unmet_need = ideal_cont_use_r - cont_use_svy

***UNMET NEED VARIABLES
*DEFINITION 1
gen unmet_need_1 = v624==1 | v624==2
replace unmet_need_1 = . if v624==.
bysort survey: egen unmet_need_1_r = mean(unmet_need_1)

*DEFINITION 2
gen unmet_need_2 = v626==1 | v626==2
replace unmet_need_2 = . if v626==.
bysort survey: egen unmet_need_2_r = mean(unmet_need_2)

***DIFFERENCES IN DEFINITIONS
*DEFINITION 1
gen diff_1 = unmet_need - unmet_need_1_r
*DEFINITION 2
gen diff_2 = unmet_need - unmet_need_2_r


***OTHER CHARACTERISTICS
*Urban, v025
gen urban = v025==1
*Age, v012
gen age = v012
*Children ever born, v201
gen ch_ever_born = v201
*Age at first cohabitation, v512
gen age_first_cohab = v512
replace age_first_cohab = . if v512 < 0 | v512==.
*Age at first sex, v525
gen age_first_sex = v525
replace age_first_sex = . if v525 > 97 | v525==.
*Husband has tertiary education, v701
gen husb_tert_educ = v701==3
replace husb_tert_educ = . if v701==.
*Respondent earns same or more than husband, v746
gen resp_earn_husb = v746==1 | v746==3
replace resp_earn_husb = . if v746==.
*Husband opposed to FP, v3a08j
gen husb_opposed = v3a08j==1
replace husb_opposed = . if v3a08j==.


*****DROP OBSERVATIONS THAT ARE INCOMPLETE IN COVARIATES, DISTANCE MEASURES, OR OUTCOME VARIABLES
drop if weight==0
*****DROP IF AGE > 49 OR AGE < 15
drop if age > 49 | age < 15
***KEEP ONLY THE SURVEYS AFTER 2010
keep if year > 2009

****COUNT THE NUMBER OF UNIQUE DHS SURVEYS
bysort v000: gen uniq_v000 = _n==1
count if uniq_v000==1

****COUNT THE NUMBER OF UNIQUE COUNTRIES
bysort country: gen uniq_countid = _n==1
count if uniq_countid==1

***COUNT THE RANGE OF YEARS
sum year

***GENERATE COUNTRY AND SURVEY YEAR LIST
bysort country year: gen uniq_csid = _n == 1
list v000 country year if uniq_csid==1

***COUNT NUMBER OF SURVEYS, COUNTRIES, YEARS FOR IDEAL SAMPLE
bysort v000 ideal: gen uniq_v000_ideal = _n==1
tab uniq_v000_ideal if ideal==1

bysort country ideal: gen uniq_count_ideal = _n==1
tab uniq_count_ideal if ideal==1

sum year if ideal==1

********************************************************************************
*******DESCRIPTIVE STATISTICS OF VARIABLES OF INTEREST
********************************************************************************


*DENSITY OF UNMET NEED
twoway kdensity unmet_need if uniq_v000==1 [aweight=v005], lcolor(red) || kdensity unmet_need_1_r if uniq_v000==1 [aweight=v005], lcolor(green) ///
|| kdensity unmet_need_2_r if uniq_v000==1 [aweight=v005], lcolor(blue) title("Kernel Density Plot of Unmet Need, 3 Definitions") xtitle("Unmet Need (%)") legend(label(1 "New Definition") label(2 "Definition 1") label(3 "Definition 2"))
graph export Unmet_Need_New.png, replace
graph export Unmet_Need_New.svg, replace
gr export Unmet_Need_New.eps, as(eps) preview(off) replace
!epstopdf Unmet_Need_New.eps

twoway kdensity diff_1 if uniq_v000==1 [aweight=v005], lcolor(red) || ///
|| kdensity diff_2 if uniq_v000==1 [aweight=v005], lcolor(blue) title("Kernel Density Plot of Differences in Unmet Need Definitions") xtitle("Difference (%)") legend(label(1 "New Definition - Definition 1") label(2 "New Definition - Definition 2"))
graph export Difference.png, replace
graph export Difference.svg, replace
gr export Difference.eps, as(eps) preview(off) replace
!epstopdf Difference.eps

***UNMET NEED SUMMARY STATISTICS
estpost sum unmet_need unmet_need_1_r unmet_need_2_r diff_1 diff_2 if uniq_v000==1 [fweight=v005]
esttab using Table_UNMET_NEED.rtf, cells("mean(fmt(3)) sd(fmt(3)) min(fmt(3)) max(fmt(3))") nomtitle nonumber replace

***LIST BY COUNTRY
list survey year ideal_cont_use_r cont_use_svy unmet_need unmet_need_1_r diff_1 unmet_need_2_r diff_2 if uniq_v000==1

***PLOT ICPR ACROSS COUNTRIES (AVERAGING ICPR BY COUNTRY FOR THOSE WITH MULTIPLE SURVEY WAVES)
bysort country: egen ideal_cpr_country = mean(ideal_cont_use_r)
list country ideal_cpr_country if uniq_countid==1

***DESCRIPTIVE STATISTICS OF WOMEN
estpost sum cont_use high_quint curr_marr sex_active high_educ knows_method no_problem_distance
esttab using Table_Descriptive.rtf, cells("mean(fmt(3)) sum(fmt(0))") nomtitle nonumber replace

estpost sum cont_use high_quint curr_marr sex_active high_educ knows_method no_problem_distance if ideal==1
esttab using Table_Descriptive_Ideal.rtf, cells("mean(fmt(3)) sum(fmt(0))") nomtitle nonumber replace

estpost sum urban age ch_ever_born age_first_cohab age_first_sex husb_tert_educ resp_earn_husb husb_opposed
esttab using Table_Characteristics.rtf, cells("mean(fmt(3)) sd(fmt(3)) sum(fmt(0))") nomtitle nonumber replace

estpost sum urban age ch_ever_born age_first_cohab age_first_sex husb_tert_educ resp_earn_husb husb_opposed if ideal==1
esttab using Table_Characteristics_Ideal.rtf, cells("mean(fmt(3)) sd(fmt(3)) sum(fmt(0))") nomtitle nonumber replace

***COUNTRY TABULATION
estpost tab country
esttab using Table_Country.rtf, cells("b(label(freq)) pct(fmt(2)) cumpct(fmt(2))") varlabels(`e(labels)') nonumber nomtitle noobs replace

estpost tab country if ideal==1
esttab using Table_Country_Ideal.rtf, cells("b(label(freq)) pct(fmt(2)) cumpct(fmt(2))") varlabels(`e(labels)') nonumber nomtitle noobs replace

***REGRESSION TO DECOMPOSE EFFECTS
logit cont_use high_quint curr_marr sex_active high_educ knows_method no_problem_distance i.survey, vce(cluster survey)
outreg2 using Reg_Dets.rtf, stats(coef ci) noparen dec(3) replace
logit cont_use i.high_quint i.curr_marr i.sex_active i.high_educ i.knows_method i.no_problem_distance i.survey, vce(cluster survey)
margins r.high_quint r.curr_marr r.sex_active r.high_educ r.knows_method r.no_problem_distance, contrast atmeans

***SENSITIVITY ANALYSIS OF IDEAL CONTRACEPTIVE USE - EXCLUDING FACTORS
estpost sum cont_use if ideal==1
esttab using Table_Sens_Cont_Use.rtf, cells("mean(fmt(3)) obs(fmt(0))") nomtitle nonumber replace
estpost sum cont_use if ideal_no_educ==1
esttab using Table_Sens_Cont_Use.rtf, cells("mean(fmt(3)) obs(fmt(0))") nomtitle nonumber append
estpost sum cont_use if ideal_no_quin==1
esttab using Table_Sens_Cont_Use.rtf, cells("mean(fmt(3)) obs(fmt(0))") nomtitle nonumber append
estpost sum cont_use if ideal_no_know==1
esttab using Table_Sens_Cont_Use.rtf, cells("mean(fmt(3)) obs(fmt(0))") nomtitle nonumber append
estpost sum cont_use if ideal_no_dist==1
esttab using Table_Sens_Cont_Use.rtf, cells("mean(fmt(3)) obs(fmt(0))") nomtitle nonumber append
estpost sum cont_use if ideal_no_marr==1
esttab using Table_Sens_Cont_Use.rtf, cells("mean(fmt(3)) obs(fmt(0))") nomtitle nonumber append
estpost sum cont_use
esttab using Table_Sens_Cont_Use.rtf, cells("mean(fmt(3)) obs(fmt(0))") nomtitle nonumber append





********************************************************************************
***MULTIPLE IMPUTATION / INDIVIDUAL MATCHING APPROACH TO UNMET NEED
********************************************************************************

***PROPENSITY (NEAREST NEIGHBOR) MATCHING BY AGE, ETHNICITY, REGION OF IDEAL AND NON-IDEAL WOMEN

levelsof survey, local(lev_survey)
keep if survey!=. & ethnicity!=.

foreach l of local lev_survey {
preserve
keep if survey==`l'
cap psmatch2 ideal age i.ethnicity, out(cont_use) noreplace n(1)
cap gen pair = _id if _treated==0
cap replace pair = _id if _treated==0
cap replace pair = _n1 if _treated==1
cap bysort survey pair: egen paircount = count(pair)
save "$maindir\UN_SURVEY`l'.dta", replace
restore
}

***MERGE DATA
use "$maindir\UN_SURVEY1.dta", clear
**MODIFY LINE 309 LOOP BASED ON THE NUMBER OF DHS SURVEYS TO BE APPENDED
forvalues i = 2/138 {
cap append using "$maindir\UN_SURVEY`i'.dta"
}
save "$maindir\UN_SURVEY_COMPLETE.dta", replace

***CONSTRUCTING CONTRACEPTIVE USE AND UNMET NEED INDICATORS:
keep if paircount >=2
drop if paircount==.

bysort survey: egen id_cont_use = mean(cont_use) if ideal==1
bysort survey: egen id_cont_use_r = mean(id_cont_use)
bysort survey: egen gen_cont_use = mean(cont_use) if ideal==0
bysort survey: egen gen_cont_use_r = mean(gen_cont_use)

gen unmet_need_match = id_cont_use_r - gen_cont_use_r
bysort survey: gen uniq_v000_match = _n==1

drop if unmet_need_match < 0
list unmet_need_match id_cont_use_r gen_cont_use_r survey if uniq_v000_match==1

*DENSITY OF UNMET NEED
twoway kdensity unmet_need_match if uniq_v000_match==1 [aweight=v005], lcolor(red) || kdensity unmet_need_1_r if uniq_v000_match==1 [aweight=v005], lcolor(green) ///
|| kdensity unmet_need_2_r if uniq_v000_match==1 [aweight=v005], lcolor(blue) title("Kernel Density Plot of Unmet Need, 3 Definitions") xtitle("Unmet Need (%)") legend(label(1 "Matched Definition") label(2 "Definition 1") label(3 "Definition 2"))
graph export Unmet_Need_Match.png, replace
graph export Unmet_Need_Match.svg, replace
gr export Unmet_Need_Match.eps, as(eps) preview(off) replace
!epstopdf Unmet_Need_Match.eps
